def track_specific_terms(df, terms_dict, text_column="full_text"): results = {}for period in df['covid_period'].unique(): period_data = df[df['covid_period'] == period] period_results = {}# get all text for i-th period all_text =' '.join(period_data[text_column].fillna('').str.lower()) total_words =len(all_text.split())for category, terms in terms_dict.items(): category_count =0 term_counts = {}for term in terms: count = all_text.count(term.lower()) term_counts[term] = count category_count += count period_results[category] = {"total_count": category_count,"frequency_per_1000": (category_count / total_words *1000) if total_words >0else0,'term_breakdown': term_counts } results[period] = period_resultsreturn results
This function counts occurrences of specific terms within each COVID period. It calculates both raw counts and normalized frequencies (per 1000 words), which allows fair comparison across periods with different post volumes. The function joins all text for a period, counts each term, and provides detailed breakdowns.
print("COVID-Related Term Frequencies by Period")print("="*60)for period in ['Pre-COVID', 'During COVID', 'Post-COVID']:if period in covid_tracking:print(f"\n{period}:")for category, data in covid_tracking[period].items():print(f"\n{category}:")print(f" Total mentions: {data['total_count']}")print(f" Per 1000 words: {data['frequency_per_1000']:.2f}")# Show top terms in category top_terms =sorted(data['term_breakdown'].items(), key=lambda x: x[1], reverse=True)[:3]ifany(count >0for _, count in top_terms):print(" Most mentioned:")for term, count in top_terms:if count >0:print(f" - {term}: {count}")
COVID-Related Term Frequencies by Period
============================================================
Pre-COVID:
pandemic_terms:
Total mentions: 3
Per 1000 words: 0.05
Most mentioned:
- virus: 2
- coronavirus: 1
lockdown_terms:
Total mentions: 91
Per 1000 words: 1.47
Most mentioned:
- alone: 81
- isolated: 8
- isolation: 1
remote_terms:
Total mentions: 26
Per 1000 words: 0.42
Most mentioned:
- online: 14
- video: 10
- remote: 1
health_safety:
Total mentions: 5
Per 1000 words: 0.08
Most mentioned:
- vaccine: 2
- distance: 2
- mask: 1
therapy_terms:
Total mentions: 69
Per 1000 words: 1.12
Most mentioned:
- therapist: 38
- therapy: 30
- counseling: 1
During COVID:
pandemic_terms:
Total mentions: 661
Per 1000 words: 0.56
Most mentioned:
- covid: 378
- pandemic: 180
- virus: 71
lockdown_terms:
Total mentions: 1298
Per 1000 words: 1.09
Most mentioned:
- alone: 978
- quarantine: 88
- lockdown: 70
remote_terms:
Total mentions: 723
Per 1000 words: 0.61
Most mentioned:
- video: 300
- online: 297
- remote: 45
health_safety:
Total mentions: 200
Per 1000 words: 0.17
Most mentioned:
- mask: 82
- distance: 64
- vaccine: 37
therapy_terms:
Total mentions: 1523
Per 1000 words: 1.28
Most mentioned:
- therapy: 719
- therapist: 643
- counseling: 154
Post-COVID:
pandemic_terms:
Total mentions: 147
Per 1000 words: 0.16
Most mentioned:
- covid: 97
- pandemic: 35
- virus: 13
lockdown_terms:
Total mentions: 897
Per 1000 words: 0.96
Most mentioned:
- alone: 762
- isolated: 62
- isolation: 33
remote_terms:
Total mentions: 453
Per 1000 words: 0.49
Most mentioned:
- video: 205
- online: 186
- remote: 26
health_safety:
Total mentions: 106
Per 1000 words: 0.11
Most mentioned:
- distance: 51
- mask: 50
- vaccine: 4
therapy_terms:
Total mentions: 1085
Per 1000 words: 1.16
Most mentioned:
- therapy: 626
- therapist: 421
- counseling: 28
Code
# Specific Stressor Analysisstressor_terms = {"job_related": ["job","work","unemployed","fired","laid off","career","boss","workplace","employment", ],"social_isolation": ["alone","lonely","isolated","friends","social","nobody","anyone","loneliness", ],"health_anxiety": ["health","sick","illness","symptoms","doctor","medical","hospital","disease", ],"financial_stress": ["money","bills","rent","financial","afford","broke","debt","pay", ],"family_issues": ["family","parents","mother","father","mom","dad","siblings","relationship", ],"academic_stress": ["school","college","university","exam","study","student","class","grade", ],# Below are categories made from doing a machine learning clustering algorithm# trying to find patterns in the text that I couldn't have come up with"health_anxiety": ["heart","symptoms","panic attack","panic attacks","scared","pain","health","anxious","attack", ],"work_stress": ["job", "home", "house", "wfh", "remote", "work"],"school_stress": ["school","parents","mom","dad","remote school","class","online class", ],"burnout": ["tired", "anymore", "hate", "exhausted", "fucking tired", "end"],}
print("\n\nStressor-Related Term Frequencies by Period")print("="*60)for period in ['Pre-COVID', 'During COVID', 'Post-COVID']:if period in stressor_tracking:print(f"\n{period}:")for category, data in stressor_tracking[period].items():print(f"\n{category}:")print(f" Total mentions: {data['total_count']}")print(f" Per 1000 words: {data['frequency_per_1000']:.2f}")# Show top terms in category top_terms =sorted(data['term_breakdown'].items(), key=lambda x: x[1], reverse=True)[:3]ifany(count >0for _, count in top_terms):print(" Most mentioned:")for term, count in top_terms:if count >0:print(f" - {term}: {count}")
Stressor-Related Term Frequencies by Period
============================================================
Pre-COVID:
job_related:
Total mentions: 264
Per 1000 words: 4.27
Most mentioned:
- work: 165
- job: 79
- career: 8
social_isolation:
Total mentions: 398
Per 1000 words: 6.43
Most mentioned:
- friends: 132
- anyone: 102
- alone: 81
health_anxiety:
Total mentions: 370
Per 1000 words: 5.98
Most mentioned:
- health: 78
- attack: 55
- scared: 49
financial_stress:
Total mentions: 185
Per 1000 words: 2.99
Most mentioned:
- rent: 96
- money: 30
- pay: 24
family_issues:
Total mentions: 352
Per 1000 words: 5.69
Most mentioned:
- family: 87
- mom: 78
- relationship: 47
academic_stress:
Total mentions: 174
Per 1000 words: 2.81
Most mentioned:
- school: 81
- class: 20
- college: 17
work_stress:
Total mentions: 354
Per 1000 words: 5.72
Most mentioned:
- work: 165
- job: 79
- home: 66
school_stress:
Total mentions: 253
Per 1000 words: 4.09
Most mentioned:
- school: 81
- mom: 78
- parents: 46
burnout:
Total mentions: 589
Per 1000 words: 9.52
Most mentioned:
- end: 415
- hate: 75
- anymore: 66
During COVID:
job_related:
Total mentions: 4756
Per 1000 words: 4.01
Most mentioned:
- work: 3217
- job: 1195
- career: 132
social_isolation:
Total mentions: 6633
Per 1000 words: 5.59
Most mentioned:
- friends: 2158
- anyone: 2126
- alone: 978
health_anxiety:
Total mentions: 7629
Per 1000 words: 6.43
Most mentioned:
- health: 1515
- anxious: 1066
- attack: 1048
financial_stress:
Total mentions: 3707
Per 1000 words: 3.13
Most mentioned:
- rent: 2271
- money: 410
- broke: 379
family_issues:
Total mentions: 5533
Per 1000 words: 4.67
Most mentioned:
- mom: 1307
- family: 1262
- parents: 942
academic_stress:
Total mentions: 3588
Per 1000 words: 3.03
Most mentioned:
- school: 1456
- college: 517
- class: 467
work_stress:
Total mentions: 6149
Per 1000 words: 5.19
Most mentioned:
- work: 3217
- job: 1195
- home: 1059
school_stress:
Total mentions: 4738
Per 1000 words: 4.00
Most mentioned:
- school: 1456
- mom: 1307
- parents: 942
burnout:
Total mentions: 10167
Per 1000 words: 8.57
Most mentioned:
- end: 6449
- hate: 1485
- anymore: 1247
Post-COVID:
job_related:
Total mentions: 3787
Per 1000 words: 4.06
Most mentioned:
- work: 2512
- job: 1017
- career: 97
social_isolation:
Total mentions: 4932
Per 1000 words: 5.29
Most mentioned:
- anyone: 1655
- friends: 1465
- alone: 762
health_anxiety:
Total mentions: 6794
Per 1000 words: 7.29
Most mentioned:
- health: 1197
- attack: 956
- scared: 810
financial_stress:
Total mentions: 2751
Per 1000 words: 2.95
Most mentioned:
- rent: 1615
- money: 311
- broke: 292
family_issues:
Total mentions: 4386
Per 1000 words: 4.70
Most mentioned:
- family: 1066
- mom: 1008
- parents: 644
academic_stress:
Total mentions: 2547
Per 1000 words: 2.73
Most mentioned:
- school: 988
- college: 405
- class: 335
work_stress:
Total mentions: 4892
Per 1000 words: 5.25
Most mentioned:
- work: 2512
- job: 1017
- home: 835
school_stress:
Total mentions: 3450
Per 1000 words: 3.70
Most mentioned:
- mom: 1008
- school: 988
- parents: 644
burnout:
Total mentions: 8314
Per 1000 words: 8.92
Most mentioned:
- end: 5314
- hate: 1164
- anymore: 989
Visualize Changes in Stressors Over Time
Code
import pandas as pdcategories =list(stressor_terms.keys())periods = ["Pre-COVID", "During COVID", "Post-COVID"]period_order = ["Pre-COVID", "During COVID", "Post-COVID"] # For sorting# --- Create a "tidy" DataFrame ---data_for_df = []for period in periods:if period in stressor_tracking:for category in categories:# Get the frequency, defaulting to 0 if not found freq = stressor_tracking[period].get(category, {}).get("frequency_per_1000", 0) data_for_df.append({"category": category,"period": period,"frequency": freq })# Create the DataFramedf = pd.DataFrame(data_for_df)
Code
import plotly.express as pximport plotly.io as piopio.renderers.default ="notebook"fig = px.bar( df, x="category", y="frequency", color="period", barmode="group", title="Stressor Mentions Across COVID Periods", labels={"category": "Stressor Category","frequency": "Frequency per 1000 words","period": "COVID Period"# This becomes the legend title }, category_orders={"period": period_order})fig.update_layout( width=1200, height=600, xaxis_tickangle=45, yaxis_gridcolor='rgba(0,0,0,0.1)', margin=dict(b=120))fig.show()
This creates an interactive grouped bar chart using Plotly, allowing comparison of stressor frequencies across the three periods. Each stressor category has three bars (one per period), making visual comparison easy.
Chart Interpretation
Burnout (tallest bars across all periods):
Pre-COVID: ~9.5
During COVID: ~8.6 (slight decrease)
Post-COVID: ~9.0 (increased again)
Shows burnout as the dominant stressor throughout, with concerning post-COVID elevation.
Health Anxiety (notable upward trend):
Pre-COVID: ~6.0
During COVID: ~6.4
Post-COVID: ~7.3 (highest point!)
The green bar (Post-COVID) exceeds both others, showing health anxiety became worse after the acute pandemic phase.
Social Isolation (declining trend):
Pre-COVID: ~6.4 (highest)
During COVID: ~5.6
Post-COVID: ~5.3 (lowest)
Paradoxical pattern: proportionally mentioned less during/after lockdowns, possibly because discourse broadened to other COVID-related topics.